PV-on-HVM: Add new ioreq 'invalidate' for zapping ioemu-dm mapccahe
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Thu, 5 Apr 2007 13:29:18 +0000 (14:29 +0100)
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Thu, 5 Apr 2007 13:29:18 +0000 (14:29 +0100)
after balloon operations in an HVM guest.

This removes the I/O port hack from the guest OS, and from ioemu.

Also we flush on reservation *increases* as well as decreases. This is
necessary until qemu-dm can demand-fault page mappings into existing
valid buckets.

Signed-off-by: Steven Hand <steven@xensource.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
12 files changed:
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
tools/ioemu/hw/xen_platform.c
tools/ioemu/target-i386-dm/helper2.c
unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/io.c
xen/arch/x86/hvm/platform.c
xen/arch/x86/hvm/svm/svm.c
xen/arch/x86/hvm/vmx/vmx.c
xen/include/asm-x86/hvm/io.h
xen/include/asm-x86/hvm/support.h
xen/include/public/hvm/ioreq.h

index 4978ea58dc2472fd03ee625e0b40f76b7dd9ef22..158c3b10cd69dd806c58c9bcb98e21eb90300c7e 100644 (file)
@@ -306,14 +306,6 @@ static int decrease_reservation(unsigned long nr_pages)
                balloon_append(pfn_to_page(pfn));
        }
 
-#ifndef CONFIG_XEN
-       /* XXX Temporary hack. */
-       {
-               extern void xen_invalidate_foreign_mappings(void);
-               xen_invalidate_foreign_mappings(); 
-       }
-#endif
-
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
index cdff19c9d852fa93c69b061f2ab686c637ec2aca..47dd03b43ded45f915f63ea4f24ff12dde891bf5 100644 (file)
 
 extern FILE *logfile;
 
-static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val)
-{
-    if (val == 0)
-        qemu_invalidate_map_cache();
-}
-
 static void platform_ioport_map(PCIDevice *pci_dev, int region_num,
                                 uint32_t addr, uint32_t size, int type)
 {
-    register_ioport_write(addr, 1, 1, platform_ioport_write, NULL);
+    /* nothing yet */
 }
 
 static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr)
index 7e6b56e961a6b9a1aa82669d19b10c285af7ee60..7beda011c6e9b1c69e40f3fb23dd8bdc074db014 100644 (file)
@@ -506,8 +506,11 @@ void __handle_ioreq(CPUState *env, ioreq_t *req)
         cpu_ioreq_xchg(env, req);
         break;
     case IOREQ_TYPE_TIMEOFFSET:
-       cpu_ioreq_timeoffset(env, req);
-       break;
+        cpu_ioreq_timeoffset(env, req);
+        break;
+    case IOREQ_TYPE_INVALIDATE:
+        qemu_invalidate_map_cache();
+        break;
     default:
         hw_error("Invalid ioreq type 0x%x\n", req->type);
     }
index c9f6d73109d52e7f375be79c2330de44a99d3aa6..8e69557ac8f9bc35f3f1c84a52ca4aa76a477ddd 100644 (file)
@@ -208,14 +208,6 @@ static uint64_t get_callback_via(struct pci_dev *pdev)
                ((uint64_t)(pin - 1) & 3));
 }
 
-/* Invalidate foreign mappings (e.g., in qemu-based device model). */
-static uint16_t invlmap_port;
-void xen_invalidate_foreign_mappings(void)
-{
-       outb(0, invlmap_port);
-}
-EXPORT_SYMBOL(xen_invalidate_foreign_mappings);
-
 static int __devinit platform_pci_init(struct pci_dev *pdev,
                                       const struct pci_device_id *ent)
 {
@@ -240,8 +232,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
                return -ENOENT;
        }
 
-       invlmap_port = ioaddr;
-
        if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL)
        {
                printk(KERN_ERR ":MEM I/O resource 0x%lx @ 0x%lx busy\n",
index c68341662faf1b15e43e058f587955668995363c..5cb283bf2ede6f8869cb137522b8000fa643b40e 100644 (file)
@@ -521,32 +521,19 @@ static hvm_hypercall_t *hvm_hypercall_table[NR_hypercalls] = {
     HYPERCALL(hvm_op)
 };
 
-int hvm_do_hypercall(struct cpu_user_regs *pregs)
+static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
 {
-    if ( unlikely(ring_3(pregs)) )
-    {
-        pregs->eax = -EPERM;
-        return 0;
-    }
-
     if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] )
     {
         if ( pregs->eax != __HYPERVISOR_grant_table_op )
             gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %d.\n",
                      current->domain->domain_id, current->vcpu_id, pregs->eax);
         pregs->eax = -ENOSYS;
-        return 0;
+        return;
     }
 
-    /* Check for preemption: EIP will be modified from this dummy value. */
-    pregs->eip = 0xF0F0F0FF;
-
     pregs->eax = hvm_hypercall_table[pregs->eax](
         pregs->ebx, pregs->ecx, pregs->edx, pregs->esi, pregs->edi);
-
-    /* XXX: put fake IO instr here to inform the emulator to flush mapcache */
-
-    return (pregs->eip != 0xF0F0F0FF); /* preempted? */
 }
 
 #else /* defined(__x86_64__) */
@@ -606,14 +593,8 @@ static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
     HYPERCALL(event_channel_op)
 };
 
-int hvm_do_hypercall(struct cpu_user_regs *pregs)
+static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
 {
-    if ( unlikely(ring_3(pregs)) )
-    {
-        pregs->rax = -EPERM;
-        return 0;
-    }
-
     pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */
     if ( (pregs->rax >= NR_hypercalls) || !hvm_hypercall64_table[pregs->rax] )
     {
@@ -621,12 +602,9 @@ int hvm_do_hypercall(struct cpu_user_regs *pregs)
             gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %ld.\n",
                      current->domain->domain_id, current->vcpu_id, pregs->rax);
         pregs->rax = -ENOSYS;
-        return 0;
+        return;
     }
 
-    /* Check for preemption: RIP will be modified from this dummy value. */
-    pregs->rip = 0xF0F0F0FF;
-
     if ( current->arch.paging.mode->guest_levels == 4 )
     {
         pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
@@ -643,14 +621,41 @@ int hvm_do_hypercall(struct cpu_user_regs *pregs)
                                                        (uint32_t)pregs->esi,
                                                        (uint32_t)pregs->edi);
     }
-
-    /* XXX: put fake IO instr here to inform the emulator to flush mapcache */
-
-    return (pregs->rip != 0xF0F0F0FF); /* preempted? */
 }
 
 #endif /* defined(__x86_64__) */
 
+int hvm_do_hypercall(struct cpu_user_regs *pregs)
+{
+    int flush, preempted;
+    unsigned long old_eip;
+
+    if ( unlikely(ring_3(pregs)) )
+    {
+        pregs->eax = -EPERM;
+        return 0;
+    }
+
+    /*
+     * NB. In future flush only on decrease_reservation.
+     * For now we also need to flush when pages are added, as qemu-dm is not
+     * yet capable of faulting pages into an existing valid mapcache bucket.
+     */
+    flush = ((uint32_t)pregs->eax == __HYPERVISOR_memory_op);
+
+    /* Check for preemption: RIP will be modified from this dummy value. */
+    old_eip = pregs->eip;
+    pregs->eip = 0xF0F0F0FF;
+
+    __hvm_do_hypercall(pregs);
+
+    preempted = (pregs->eip != 0xF0F0F0FF);
+    pregs->eip = old_eip;
+
+    return (preempted ? HVM_HCALL_preempted :
+            flush ? HVM_HCALL_invalidate : HVM_HCALL_completed);
+}
+
 void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3)
 {
     v->arch.hvm_vcpu.hw_cr3 = guest_cr3;
index 3177426484ed9429f0a2a048b0aeeea216cc9545..03820f0ebd1ae482a8f20a3e8f4c3d188e3a8b74 100644 (file)
@@ -845,10 +845,17 @@ void hvm_io_assist(void)
 
     p->state = STATE_IOREQ_NONE;
 
-    if ( p->type == IOREQ_TYPE_PIO )
+    switch ( p->type )
+    {
+    case IOREQ_TYPE_INVALIDATE:
+        goto out;
+    case IOREQ_TYPE_PIO:
         hvm_pio_assist(regs, p, io_opp);
-    else
+        break;
+    default:
         hvm_mmio_assist(regs, p, io_opp);
+        break;
+    }
 
     /* Copy register changes back into current guest state. */
     hvm_load_cpu_guest_regs(v, regs);
@@ -861,6 +868,7 @@ void hvm_io_assist(void)
         mark_dirty(d, gmfn);
     }
 
+ out:
     vcpu_end_shutdown_deferral(v);
 }
 
index 0862da27531bd475a8759a60ef8bf172f5576835..d333a92ace56e458c717c674480e2d4bad614e94 100644 (file)
@@ -941,6 +941,34 @@ void send_timeoffset_req(unsigned long timeoff)
         printk("Unsuccessful timeoffset update\n");
 }
 
+/* Ask ioemu mapcache to invalidate mappings. */
+void send_invalidate_req(void)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if ( vio == NULL )
+    {
+        printk("bad shared page: %lx\n", (unsigned long) vio);
+        domain_crash_synchronous();
+    }
+
+    p = &vio->vp_ioreq;
+    if ( p->state != STATE_IOREQ_NONE )
+        printk("WARNING: send invalidate req with something "
+               "already pending (%d)?\n", p->state);
+
+    p->type = IOREQ_TYPE_INVALIDATE;
+    p->size = 4;
+    p->dir = IOREQ_WRITE;
+    p->data = ~0UL; /* flush all */
+    p->io_count++;
+
+    hvm_send_assist_req(v);
+}
+
 static void mmio_operands(int type, unsigned long gpa,
                           struct hvm_io_op *mmio_op,
                           unsigned char op_size)
index 39c94bdb311ea9824409d2267b7dcd74036b324e..ae0b42ae66c48a8176dac2ed283707c4a2563467 100644 (file)
@@ -2166,7 +2166,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
     unsigned long eip;
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    int inst_len;
+    int inst_len, rc;
 
     exit_reason = vmcb->exitcode;
     save_svm_cpu_user_regs(v, regs);
@@ -2275,8 +2275,13 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
         inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
         ASSERT(inst_len > 0);
         HVMTRACE_1D(VMMCALL, v, regs->eax);
-        if ( !hvm_do_hypercall(regs) )
-            __update_guest_eip(vmcb, inst_len); /* not preempted */
+        rc = hvm_do_hypercall(regs);
+        if ( rc != HVM_HCALL_preempted )
+        {
+            __update_guest_eip(vmcb, inst_len);
+            if ( rc == HVM_HCALL_invalidate )
+                send_invalidate_req();
+        }
         break;
 
     case VMEXIT_CR0_READ:
index 855274b8da79bca5ceab9656ae60a8778ce6dd4c..b4cfe17b294c61afc6a7e5f7205319c98292c5be 100644 (file)
@@ -2626,10 +2626,16 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
     }
     case EXIT_REASON_VMCALL:
     {
+        int rc;
         HVMTRACE_1D(VMMCALL, v, regs->eax);
         inst_len = __get_instruction_length(); /* Safe: VMCALL */
-        if ( !hvm_do_hypercall(regs) )
-            __update_guest_eip(inst_len); /* not preempted */
+        rc = hvm_do_hypercall(regs);
+        if ( rc != HVM_HCALL_preempted )
+        {
+            __update_guest_eip(inst_len);
+            if ( rc == HVM_HCALL_invalidate )
+                send_invalidate_req();
+        }
         break;
     }
     case EXIT_REASON_CR_ACCESS:
index c0663ccb0dc1eb947689be30e6900ba9cffb56e5..17c2f5831a9199fb30e573bec935ddc5c7e32a51 100644 (file)
@@ -147,6 +147,7 @@ static inline int irq_masked(unsigned long eflags)
 extern void send_pio_req(unsigned long port, unsigned long count, int size,
                          paddr_t value, int dir, int df, int value_is_ptr);
 void send_timeoffset_req(unsigned long timeoff);
+void send_invalidate_req(void);
 extern void handle_mmio(unsigned long gpa);
 extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
 extern void hvm_io_assist(void);
index 9880f920a09559baeb6a11d33bac642771d7cdd8..1d331ef5da2221787d5a6bfb0f1e1758af4592c3 100644 (file)
@@ -228,6 +228,9 @@ int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size);
 void hvm_print_line(struct vcpu *v, const char c);
 void hlt_timer_fn(void *data);
 
+#define HVM_HCALL_completed  0 /* hypercall completed - no further action */
+#define HVM_HCALL_preempted  1 /* hypercall preempted - re-execute VMCALL */
+#define HVM_HCALL_invalidate 2 /* invalidate ioemu-dm memory cache        */
 int hvm_do_hypercall(struct cpu_user_regs *pregs);
 
 void hvm_hlt(unsigned long rflags);
index 646246e854c837f3a5624e14ed33a077496cc4e1..063810cc74281f41f51749b6098be97d22e9ff2d 100644 (file)
@@ -40,6 +40,7 @@
 #define IOREQ_TYPE_XCHG         5
 #define IOREQ_TYPE_ADD          6
 #define IOREQ_TYPE_TIMEOFFSET   7
+#define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to